import requests
from bs4 import BeautifulSoup
import json
import re

# 发送请求，获取HTML数据
url = "https://ref.gs1.org/ai/"
response = requests.get(url)
html = response.text

# 解析HTML数据
soup = BeautifulSoup(html, 'html.parser')
lang_script = soup.find('script', id='translatedText')
lang_txts = lang_script.get_text().strip()
print(lang_txts)
langs_row = lang_txts.split('\n')
print("langs_row", "=" * 100)
print(langs_row)
langs_tab = [row.split('\t') for row in langs_row]
print("langs_tab", "=" * 100)
print(langs_tab)
titles = langs_tab.pop(0)
titles.pop(0)
ai_langs = {}


def apd_langs(_ai, _row):
    for _col, _key in zip(_row, titles):
        if _key in ai_langs:
            ai_langs[_key][_ai] = _col
        else:
            ai_langs[_key] = {_ai: _col}


for row in langs_tab:
    ai = row[0]
    if ai.startswith("n"): continue
    if ai.endswith("n"):
        for i in range(1, 10):
            apd_langs(f"{ai[:-1]}{i}", row[1:])
    elif "-" in ai:
        st, ed = ai.split("-")
        for ai_ in range(int(st), int(ed)+1):
            apd_langs(ai_, row[1:])
    else:
        apd_langs(ai, row[1:])

print(titles)

with open("gs1_ai_langs.json", mode="w", encoding='utf8') as w:
    w.write(json.dumps(ai_langs, ensure_ascii=False, indent=4))

print(ai_langs)
